#ifndef ENV_H
#define ENV_H

#include <vector>
#include <random>
#include <string>
#include <tuple>

class FiniteStateFiniteActionMDP {
public:
    // MDP structure
    int H;  // horizon
    int S;  // number of states
    int A;  // number of actions
    // Constructor
    FiniteStateFiniteActionMDP(int H, int S, int A, std::mt19937& gen);

    // Another constructor for loading from CSV
    FiniteStateFiniteActionMDP(int H, int S, int A,
                               const std::string& transition_file,
                               const std::string& reward_file);

    // Environment control
    int reset();                                  // Reset environment and return initial state
    std::pair<int, float> step(int action);      // Take action and return (next_state, reward)
    void save_env(int n, const std::string& folder);  // Save P and R to disk
    void load_env(int n, const std::string& folder);  // Load P and R from disk

    // Value estimation and policy generation
    std::tuple<std::vector<std::vector<std::vector<float>>>, std::vector<std::vector<float>>> 
    full_value_gen(const std::vector<std::vector<std::vector<float>>>& actions);

    std::vector<float> 
    value_gen(const std::vector<std::vector<std::vector<float>>>& actions);

    std::tuple<std::vector<float>, 
               std::vector<std::vector<std::vector<float>>>, 
               std::vector<std::vector<std::vector<float>>>> 
    best_gen();

private:


    // State and time
    int t;
    int state;

    // Random number generator (by reference)
    std::mt19937& gen;

    // Transition probabilities and rewards
    std::vector<std::vector<std::vector<std::vector<float>>>> P; // shape: [H][S][A][S]
    std::vector<std::vector<std::vector<float>>> R;              // shape: [H][S][A]
};

#endif // ENV_H